In [1]:
%run "../0. config.ipynb"
In [2]:
list_types = list(df_raw.type.unique())
In [3]:
def compute_players_type_events(logs):
df = logs[["playerId", "type"]].copy()
df["count"] = 1
df = df.groupby(["playerId", "type"]).agg({ "count" : np.sum })
df = df.unstack()
df.columns = df.columns.droplevel()
df = df.reset_index()
df = df.fillna(0)
df = df.set_index([ "playerId" ])
# print len(df.index)
# print df.columns
return df
compute_players_type_events(df_raw).head()
Out[3]:
In [4]:
def compute_game_durations(logs):
df = logs.groupby("playerId").agg({ "serverTime": [ np.min, np.max ] })
df["duration"] = pd.to_datetime(df["serverTime"]["amax"]) - pd.to_datetime(df["serverTime"]["amin"])
df["duration"] = df["duration"].map(lambda x: np.timedelta64(x, 's'))
df = df.loc[:, [("duration", "")]]
df.columns = df.columns.droplevel()
df.columns = [ "duration (seconds)" ]
df["duration (seconds)"] = df["duration (seconds)"].astype(int)/1000000000
return df
compute_game_durations(df_raw).head()
Out[4]:
In [5]:
def max_reach(x):
checkpoints = [ int(checkpoint[-2:]) for checkpoint in x if checkpoint != np.nan and not isinstance(checkpoint, float)]
# print checkpoints
if len(checkpoints) > 0:
return max([ int(checkpoint[-2:]) for checkpoint in x if checkpoint != np.nan and not isinstance(checkpoint, float)])
else:
return 0
def adventure(x):
return "adventure1" in [ w.split(".")[0] for w in x if w != np.nan and not isinstance(w, float) ]
def sandbox(x):
l = { w.split(".")[0] for w in x if w != np.nan and not isinstance(w, float) }
return ("sandbox1" in l) or ("sandbox2" in l)
def compute_max_reachpoint(logs):
df = logs.loc[:, ["playerId", "type", "section"]].groupby("playerId").agg({ "section": [ max_reach, adventure, sandbox ] })
df.columns = df.columns.droplevel()
return df
compute_max_reachpoint(df_raw).head()
Out[5]:
In [6]:
def compile_sessionid(raw):
def sessionid(x):
def norm(t):
if t != np.nan and not isinstance(t, float) :
return t.replace("\"", "")
else:
return None
res = { norm(s) for s in x if norm(s) != None }
if len(res) > 0:
return list(res)[0]
df = raw.loc[:, ["playerId", "customData.localplayerguid"]].groupby("playerId").agg({ "customData.localplayerguid" : sessionid })
# df["customData.localplayerguid"] = df["customData.localplayerguid"].map(sessionid)
#df = df.set_index("playerId")
df = df.rename(columns={ "customData.localplayerguid": "sessionId" })
return df
compile_sessionid(df_raw[0:100]).head()
Out[6]:
In [7]:
players_stats = compute_players_type_events(df_raw)
players_stats = pd.merge(players_stats, compute_game_durations(df_raw), left_index=True, right_index=True)
players_stats = pd.merge(players_stats, compute_max_reachpoint(df_raw), left_index=True, right_index=True)
players_stats = pd.merge(players_stats, compile_sessionid(df_raw), left_index=True, right_index=True)
In [8]:
len(players_stats[ (players_stats["adventure"] == False) & (players_stats["sandbox"] == False) ])
Out[8]:
In [9]:
len(players_stats[ (players_stats["adventure"] == True) & (players_stats["sandbox"] == True) ])
Out[9]:
In [10]:
len(players_stats[ (players_stats["sandbox"] == True) ])
Out[10]:
In [11]:
players_stats.head()
Out[11]:
In [13]:
players_stats.to_csv("players_stats.csv", encoding="utf-8")